HDFS-3027. Implement a simple NN health check. Contributed by Aaron T. Myers.
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1295300 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
978a8050e2
commit
7b6b204924
@ -240,3 +240,5 @@ HDFS-3019. Fix silent failure of TestEditLogJournalFailures (todd)
|
|||||||
HDFS-2958. Sweep for remaining proxy construction which doesn't go through failover path. (atm)
|
HDFS-2958. Sweep for remaining proxy construction which doesn't go through failover path. (atm)
|
||||||
|
|
||||||
HDFS-2920. fix remaining TODO items. (atm and todd)
|
HDFS-2920. fix remaining TODO items. (atm and todd)
|
||||||
|
|
||||||
|
HDFS-3027. Implement a simple NN health check. (atm)
|
||||||
|
@ -3082,7 +3082,7 @@ private NNHAStatusHeartbeat createHaStatusHeartbeat() {
|
|||||||
*
|
*
|
||||||
* @return true if there were sufficient resources available, false otherwise.
|
* @return true if there were sufficient resources available, false otherwise.
|
||||||
*/
|
*/
|
||||||
private boolean nameNodeHasResourcesAvailable() {
|
boolean nameNodeHasResourcesAvailable() {
|
||||||
return hasResourcesAvailable;
|
return hasResourcesAvailable;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -3090,7 +3090,7 @@ private boolean nameNodeHasResourcesAvailable() {
|
|||||||
* Perform resource checks and cache the results.
|
* Perform resource checks and cache the results.
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
*/
|
*/
|
||||||
private void checkAvailableResources() throws IOException {
|
void checkAvailableResources() {
|
||||||
Preconditions.checkState(nnResourceChecker != null,
|
Preconditions.checkState(nnResourceChecker != null,
|
||||||
"nnResourceChecker not initialized");
|
"nnResourceChecker not initialized");
|
||||||
hasResourcesAvailable = nnResourceChecker.hasAvailableDiskSpace();
|
hasResourcesAvailable = nnResourceChecker.hasAvailableDiskSpace();
|
||||||
@ -5188,4 +5188,9 @@ ReentrantReadWriteLock getFsLockForTests() {
|
|||||||
public SafeModeInfo getSafeModeInfoForTests() {
|
public SafeModeInfo getSafeModeInfoForTests() {
|
||||||
return safeMode;
|
return safeMode;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@VisibleForTesting
|
||||||
|
public void setNNResourceChecker(NameNodeResourceChecker nnResourceChecker) {
|
||||||
|
this.nnResourceChecker = nnResourceChecker;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -920,8 +920,11 @@ synchronized void monitorHealth()
|
|||||||
if (!haEnabled) {
|
if (!haEnabled) {
|
||||||
return; // no-op, if HA is not enabled
|
return; // no-op, if HA is not enabled
|
||||||
}
|
}
|
||||||
// TODO(HA): implement health check
|
getNamesystem().checkAvailableResources();
|
||||||
return;
|
if (!getNamesystem().nameNodeHasResourcesAvailable()) {
|
||||||
|
throw new HealthCheckFailedException(
|
||||||
|
"The NameNode has no resources available");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
synchronized void transitionToActive()
|
synchronized void transitionToActive()
|
||||||
|
@ -46,7 +46,7 @@
|
|||||||
* are added by default, and arbitrary extra volumes may be configured as well.
|
* are added by default, and arbitrary extra volumes may be configured as well.
|
||||||
*/
|
*/
|
||||||
@InterfaceAudience.Private
|
@InterfaceAudience.Private
|
||||||
class NameNodeResourceChecker {
|
public class NameNodeResourceChecker {
|
||||||
private static final Log LOG = LogFactory.getLog(NameNodeResourceChecker.class.getName());
|
private static final Log LOG = LogFactory.getLog(NameNodeResourceChecker.class.getName());
|
||||||
|
|
||||||
// Space (in bytes) reserved per volume.
|
// Space (in bytes) reserved per volume.
|
||||||
@ -176,8 +176,7 @@ private void addDirToCheck(URI directoryToCheck, boolean required)
|
|||||||
* least one redundant volume and all of the required volumes, false
|
* least one redundant volume and all of the required volumes, false
|
||||||
* otherwise.
|
* otherwise.
|
||||||
*/
|
*/
|
||||||
boolean hasAvailableDiskSpace()
|
public boolean hasAvailableDiskSpace() {
|
||||||
throws IOException {
|
|
||||||
return NameNodeResourcePolicy.areResourcesAvailable(volumes.values(),
|
return NameNodeResourcePolicy.areResourcesAvailable(volumes.values(),
|
||||||
minimumRedundantVolumes);
|
minimumRedundantVolumes);
|
||||||
}
|
}
|
||||||
|
@ -1145,7 +1145,14 @@ public NameNode getNameNode() {
|
|||||||
*/
|
*/
|
||||||
public NamenodeProtocols getNameNodeRpc() {
|
public NamenodeProtocols getNameNodeRpc() {
|
||||||
checkSingleNameNode();
|
checkSingleNameNode();
|
||||||
return getNameNode(0).getRpcServer();
|
return getNameNodeRpc(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get an instance of the NameNode's RPC handler.
|
||||||
|
*/
|
||||||
|
public NamenodeProtocols getNameNodeRpc(int nnIndex) {
|
||||||
|
return getNameNode(nnIndex).getRpcServer();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -0,0 +1,73 @@
|
|||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.hdfs.server.namenode.ha;
|
||||||
|
|
||||||
|
import static org.junit.Assert.fail;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.ha.HealthCheckFailedException;
|
||||||
|
import org.apache.hadoop.hdfs.MiniDFSCluster;
|
||||||
|
import org.apache.hadoop.hdfs.MiniDFSNNTopology;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.NameNodeResourceChecker;
|
||||||
|
import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols;
|
||||||
|
import org.apache.hadoop.test.GenericTestUtils;
|
||||||
|
import org.junit.Test;
|
||||||
|
import org.mockito.Mockito;
|
||||||
|
|
||||||
|
public class TestNNHealthCheck {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testNNHealthCheck() throws IOException {
|
||||||
|
MiniDFSCluster cluster = null;
|
||||||
|
try {
|
||||||
|
Configuration conf = new Configuration();
|
||||||
|
cluster = new MiniDFSCluster.Builder(conf)
|
||||||
|
.numDataNodes(0)
|
||||||
|
.nnTopology(MiniDFSNNTopology.simpleHATopology())
|
||||||
|
.build();
|
||||||
|
|
||||||
|
NameNodeResourceChecker mockResourceChecker = Mockito.mock(
|
||||||
|
NameNodeResourceChecker.class);
|
||||||
|
Mockito.doReturn(true).when(mockResourceChecker).hasAvailableDiskSpace();
|
||||||
|
cluster.getNameNode(0).getNamesystem()
|
||||||
|
.setNNResourceChecker(mockResourceChecker);
|
||||||
|
|
||||||
|
NamenodeProtocols rpc = cluster.getNameNodeRpc(0);
|
||||||
|
|
||||||
|
// Should not throw error, which indicates healthy.
|
||||||
|
rpc.monitorHealth();
|
||||||
|
|
||||||
|
Mockito.doReturn(false).when(mockResourceChecker).hasAvailableDiskSpace();
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Should throw error - NN is unhealthy.
|
||||||
|
rpc.monitorHealth();
|
||||||
|
fail("Should not have succeeded in calling monitorHealth");
|
||||||
|
} catch (HealthCheckFailedException hcfe) {
|
||||||
|
GenericTestUtils.assertExceptionContains(
|
||||||
|
"The NameNode has no resources available", hcfe);
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
if (cluster != null) {
|
||||||
|
cluster.shutdown();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user