HDFS-3027. Implement a simple NN health check. Contributed by Aaron T. Myers.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1295300 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Aaron Myers 2012-02-29 21:57:56 +00:00
parent 978a8050e2
commit 7b6b204924
6 changed files with 97 additions and 8 deletions

View File

@ -240,3 +240,5 @@ HDFS-3019. Fix silent failure of TestEditLogJournalFailures (todd)
HDFS-2958. Sweep for remaining proxy construction which doesn't go through failover path. (atm)
HDFS-2920. fix remaining TODO items. (atm and todd)
HDFS-3027. Implement a simple NN health check. (atm)

View File

@ -3082,7 +3082,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
*
* @return true if there were sufficient resources available, false otherwise.
*/
private boolean nameNodeHasResourcesAvailable() {
boolean nameNodeHasResourcesAvailable() {
return hasResourcesAvailable;
}
@ -3090,7 +3090,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
* Perform resource checks and cache the results.
* @throws IOException
*/
private void checkAvailableResources() throws IOException {
void checkAvailableResources() {
Preconditions.checkState(nnResourceChecker != null,
"nnResourceChecker not initialized");
hasResourcesAvailable = nnResourceChecker.hasAvailableDiskSpace();
@ -5188,4 +5188,9 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
public SafeModeInfo getSafeModeInfoForTests() {
return safeMode;
}
@VisibleForTesting
public void setNNResourceChecker(NameNodeResourceChecker nnResourceChecker) {
this.nnResourceChecker = nnResourceChecker;
}
}

View File

@ -920,8 +920,11 @@ public class NameNode {
if (!haEnabled) {
return; // no-op, if HA is not enabled
}
// TODO(HA): implement health check
return;
getNamesystem().checkAvailableResources();
if (!getNamesystem().nameNodeHasResourcesAvailable()) {
throw new HealthCheckFailedException(
"The NameNode has no resources available");
}
}
synchronized void transitionToActive()

View File

@ -46,7 +46,7 @@ import com.google.common.base.Predicate;
* are added by default, and arbitrary extra volumes may be configured as well.
*/
@InterfaceAudience.Private
class NameNodeResourceChecker {
public class NameNodeResourceChecker {
private static final Log LOG = LogFactory.getLog(NameNodeResourceChecker.class.getName());
// Space (in bytes) reserved per volume.
@ -176,8 +176,7 @@ class NameNodeResourceChecker {
* least one redundant volume and all of the required volumes, false
* otherwise.
*/
boolean hasAvailableDiskSpace()
throws IOException {
public boolean hasAvailableDiskSpace() {
return NameNodeResourcePolicy.areResourcesAvailable(volumes.values(),
minimumRedundantVolumes);
}

View File

@ -1145,7 +1145,14 @@ public class MiniDFSCluster {
*/
public NamenodeProtocols getNameNodeRpc() {
checkSingleNameNode();
return getNameNode(0).getRpcServer();
return getNameNodeRpc(0);
}
/**
* Get an instance of the NameNode's RPC handler.
*/
public NamenodeProtocols getNameNodeRpc(int nnIndex) {
return getNameNode(nnIndex).getRpcServer();
}
/**

View File

@ -0,0 +1,73 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs.server.namenode.ha;
import static org.junit.Assert.fail;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.ha.HealthCheckFailedException;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.MiniDFSNNTopology;
import org.apache.hadoop.hdfs.server.namenode.NameNodeResourceChecker;
import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols;
import org.apache.hadoop.test.GenericTestUtils;
import org.junit.Test;
import org.mockito.Mockito;
public class TestNNHealthCheck {
@Test
public void testNNHealthCheck() throws IOException {
MiniDFSCluster cluster = null;
try {
Configuration conf = new Configuration();
cluster = new MiniDFSCluster.Builder(conf)
.numDataNodes(0)
.nnTopology(MiniDFSNNTopology.simpleHATopology())
.build();
NameNodeResourceChecker mockResourceChecker = Mockito.mock(
NameNodeResourceChecker.class);
Mockito.doReturn(true).when(mockResourceChecker).hasAvailableDiskSpace();
cluster.getNameNode(0).getNamesystem()
.setNNResourceChecker(mockResourceChecker);
NamenodeProtocols rpc = cluster.getNameNodeRpc(0);
// Should not throw error, which indicates healthy.
rpc.monitorHealth();
Mockito.doReturn(false).when(mockResourceChecker).hasAvailableDiskSpace();
try {
// Should throw error - NN is unhealthy.
rpc.monitorHealth();
fail("Should not have succeeded in calling monitorHealth");
} catch (HealthCheckFailedException hcfe) {
GenericTestUtils.assertExceptionContains(
"The NameNode has no resources available", hcfe);
}
} finally {
if (cluster != null) {
cluster.shutdown();
}
}
}
}