HDFS-11711. DN should not delete the block On "Too many open files" Exception. Contributed by Brahma Reddy Battula.

(cherry picked from commit fca08f8362)
This commit is contained in:
Brahma Reddy Battula 2017-06-07 16:24:50 +08:00
parent 4a391c72d4
commit 8a16846f5d
3 changed files with 75 additions and 4 deletions

View File

@ -305,6 +305,7 @@ class BlockSender implements java.io.Closeable {
LengthInputStream metaIn = null;
boolean keepMetaInOpen = false;
try {
DataNodeFaultInjector.get().throwTooManyOpenFiles();
metaIn = datanode.data.getMetaDataInputStream(block);
if (!corruptChecksumOk || metaIn != null) {
if (metaIn == null) {
@ -334,10 +335,14 @@ class BlockSender implements java.io.Closeable {
LOG.warn("Could not find metadata file for " + block);
}
} catch (FileNotFoundException e) {
// The replica is on its volume map but not on disk
datanode.notifyNamenodeDeletedBlock(block, replica.getStorageUuid());
datanode.data.invalidate(block.getBlockPoolId(),
new Block[]{block.getLocalBlock()});
if ((e.getMessage() != null) && !(e.getMessage()
.contains("Too many open files"))) {
// The replica is on its volume map but not on disk
datanode
.notifyNamenodeDeletedBlock(block, replica.getStorageUuid());
datanode.data.invalidate(block.getBlockPoolId(),
new Block[] {block.getLocalBlock()});
}
throw e;
} finally {
if (!keepMetaInOpen) {

View File

@ -21,6 +21,7 @@ import com.google.common.annotations.VisibleForTesting;
import org.apache.hadoop.classification.InterfaceAudience;
import java.io.FileNotFoundException;
import java.io.IOException;
/**
@ -58,4 +59,7 @@ public class DataNodeFaultInjector {
public void failPipeline(ReplicaInPipelineInterface replicaInfo,
String mirrorAddr) throws IOException { }
public void throwTooManyOpenFiles() throws FileNotFoundException {
}
}

View File

@ -24,8 +24,10 @@ import static org.apache.hadoop.test.MetricsAsserts.getMetrics;
import static org.junit.Assert.*;
import java.io.Closeable;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.lang.management.ManagementFactory;
import java.util.concurrent.TimeoutException;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.List;
@ -44,8 +46,10 @@ import org.apache.hadoop.hdfs.DFSTestUtil;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.HdfsConfiguration;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.metrics2.MetricsRecordBuilder;
@ -331,4 +335,62 @@ public class TestDataNodeMetrics {
}
}
}
@Test
public void testDNShouldNotDeleteBlockONTooManyOpenFiles()
throws Exception {
Configuration conf = new HdfsConfiguration();
conf.setLong(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 1L);
conf.setLong(HdfsClientConfigKeys.Retry.WINDOW_BASE_KEY, 1);
DataNodeFaultInjector oldInjector = DataNodeFaultInjector.get();
MiniDFSCluster cluster =
new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
final DataNodeFaultInjector injector =
Mockito.mock(DataNodeFaultInjector.class);
try {
// wait until the cluster is up
cluster.waitActive();
DistributedFileSystem fs = cluster.getFileSystem();
Path p = new Path("/testShouldThrowTMP");
DFSTestUtil.writeFile(fs, p, new String("testdata"));
//Before DN throws too many open files
verifyBlockLocations(fs, p, 1);
Mockito.doThrow(new FileNotFoundException("Too many open files")).
when(injector).
throwTooManyOpenFiles();
DataNodeFaultInjector.set(injector);
ExtendedBlock b =
fs.getClient().getLocatedBlocks(p.toString(), 0).get(0).getBlock();
try {
new BlockSender(b, 0, -1, false, true, true,
cluster.getDataNodes().get(0), null,
CachingStrategy.newDefaultStrategy());
fail("Must throw FileNotFoundException");
} catch (FileNotFoundException fe) {
assertTrue("Should throw too many open files",
fe.getMessage().contains("Too many open files"));
}
cluster.triggerHeartbeats(); // IBR delete ack
//After DN throws too many open files
assertTrue(cluster.getDataNodes().get(0).getFSDataset().isValidBlock(b));
verifyBlockLocations(fs, p, 1);
} finally {
if (cluster != null) {
cluster.shutdown();
}
DataNodeFaultInjector.set(oldInjector);
}
}
private void verifyBlockLocations(DistributedFileSystem fs, Path p,
final int expected)
throws IOException, TimeoutException, InterruptedException {
final LocatedBlock lb =
fs.getClient().getLocatedBlocks(p.toString(), 0).get(0);
GenericTestUtils.waitFor(new Supplier<Boolean>() {
@Override public Boolean get() {
return lb.getLocations().length == expected;
}
}, 1000, 6000);
}
}