HDFS-3027. Implement a simple NN health check. Contributed by Aaron T. Myers.
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1295300 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
978a8050e2
commit
7b6b204924
|
@ -240,3 +240,5 @@ HDFS-3019. Fix silent failure of TestEditLogJournalFailures (todd)
|
|||
HDFS-2958. Sweep for remaining proxy construction which doesn't go through failover path. (atm)
|
||||
|
||||
HDFS-2920. fix remaining TODO items. (atm and todd)
|
||||
|
||||
HDFS-3027. Implement a simple NN health check. (atm)
|
||||
|
|
|
@ -3082,7 +3082,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
|
|||
*
|
||||
* @return true if there were sufficient resources available, false otherwise.
|
||||
*/
|
||||
private boolean nameNodeHasResourcesAvailable() {
|
||||
boolean nameNodeHasResourcesAvailable() {
|
||||
return hasResourcesAvailable;
|
||||
}
|
||||
|
||||
|
@ -3090,7 +3090,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
|
|||
* Perform resource checks and cache the results.
|
||||
* @throws IOException
|
||||
*/
|
||||
private void checkAvailableResources() throws IOException {
|
||||
void checkAvailableResources() {
|
||||
Preconditions.checkState(nnResourceChecker != null,
|
||||
"nnResourceChecker not initialized");
|
||||
hasResourcesAvailable = nnResourceChecker.hasAvailableDiskSpace();
|
||||
|
@ -5188,4 +5188,9 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
|
|||
public SafeModeInfo getSafeModeInfoForTests() {
|
||||
return safeMode;
|
||||
}
|
||||
|
||||
@VisibleForTesting
|
||||
public void setNNResourceChecker(NameNodeResourceChecker nnResourceChecker) {
|
||||
this.nnResourceChecker = nnResourceChecker;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -920,8 +920,11 @@ public class NameNode {
|
|||
if (!haEnabled) {
|
||||
return; // no-op, if HA is not enabled
|
||||
}
|
||||
// TODO(HA): implement health check
|
||||
return;
|
||||
getNamesystem().checkAvailableResources();
|
||||
if (!getNamesystem().nameNodeHasResourcesAvailable()) {
|
||||
throw new HealthCheckFailedException(
|
||||
"The NameNode has no resources available");
|
||||
}
|
||||
}
|
||||
|
||||
synchronized void transitionToActive()
|
||||
|
|
|
@ -46,7 +46,7 @@ import com.google.common.base.Predicate;
|
|||
* are added by default, and arbitrary extra volumes may be configured as well.
|
||||
*/
|
||||
@InterfaceAudience.Private
|
||||
class NameNodeResourceChecker {
|
||||
public class NameNodeResourceChecker {
|
||||
private static final Log LOG = LogFactory.getLog(NameNodeResourceChecker.class.getName());
|
||||
|
||||
// Space (in bytes) reserved per volume.
|
||||
|
@ -176,8 +176,7 @@ class NameNodeResourceChecker {
|
|||
* least one redundant volume and all of the required volumes, false
|
||||
* otherwise.
|
||||
*/
|
||||
boolean hasAvailableDiskSpace()
|
||||
throws IOException {
|
||||
public boolean hasAvailableDiskSpace() {
|
||||
return NameNodeResourcePolicy.areResourcesAvailable(volumes.values(),
|
||||
minimumRedundantVolumes);
|
||||
}
|
||||
|
|
|
@ -1145,7 +1145,14 @@ public class MiniDFSCluster {
|
|||
*/
|
||||
public NamenodeProtocols getNameNodeRpc() {
|
||||
checkSingleNameNode();
|
||||
return getNameNode(0).getRpcServer();
|
||||
return getNameNodeRpc(0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get an instance of the NameNode's RPC handler.
|
||||
*/
|
||||
public NamenodeProtocols getNameNodeRpc(int nnIndex) {
|
||||
return getNameNode(nnIndex).getRpcServer();
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -0,0 +1,73 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hdfs.server.namenode.ha;
|
||||
|
||||
import static org.junit.Assert.fail;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.ha.HealthCheckFailedException;
|
||||
import org.apache.hadoop.hdfs.MiniDFSCluster;
|
||||
import org.apache.hadoop.hdfs.MiniDFSNNTopology;
|
||||
import org.apache.hadoop.hdfs.server.namenode.NameNodeResourceChecker;
|
||||
import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols;
|
||||
import org.apache.hadoop.test.GenericTestUtils;
|
||||
import org.junit.Test;
|
||||
import org.mockito.Mockito;
|
||||
|
||||
public class TestNNHealthCheck {
|
||||
|
||||
@Test
|
||||
public void testNNHealthCheck() throws IOException {
|
||||
MiniDFSCluster cluster = null;
|
||||
try {
|
||||
Configuration conf = new Configuration();
|
||||
cluster = new MiniDFSCluster.Builder(conf)
|
||||
.numDataNodes(0)
|
||||
.nnTopology(MiniDFSNNTopology.simpleHATopology())
|
||||
.build();
|
||||
|
||||
NameNodeResourceChecker mockResourceChecker = Mockito.mock(
|
||||
NameNodeResourceChecker.class);
|
||||
Mockito.doReturn(true).when(mockResourceChecker).hasAvailableDiskSpace();
|
||||
cluster.getNameNode(0).getNamesystem()
|
||||
.setNNResourceChecker(mockResourceChecker);
|
||||
|
||||
NamenodeProtocols rpc = cluster.getNameNodeRpc(0);
|
||||
|
||||
// Should not throw error, which indicates healthy.
|
||||
rpc.monitorHealth();
|
||||
|
||||
Mockito.doReturn(false).when(mockResourceChecker).hasAvailableDiskSpace();
|
||||
|
||||
try {
|
||||
// Should throw error - NN is unhealthy.
|
||||
rpc.monitorHealth();
|
||||
fail("Should not have succeeded in calling monitorHealth");
|
||||
} catch (HealthCheckFailedException hcfe) {
|
||||
GenericTestUtils.assertExceptionContains(
|
||||
"The NameNode has no resources available", hcfe);
|
||||
}
|
||||
} finally {
|
||||
if (cluster != null) {
|
||||
cluster.shutdown();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue