HDFS-5856. DataNode.checkDiskError might throw NPE. Contributed by Josh Elser.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1563064 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Suresh Srinivas 2014-01-31 05:35:30 +00:00
parent ed097d24fe
commit 7039b776c6
3 changed files with 43 additions and 7 deletions

View File

@ -310,6 +310,9 @@ Release 2.4.0 - UNRELEASED
HDFS-5843. DFSClient.getFileChecksum() throws IOException if checksum is
disabled. (Laurent Goujon via jing9)
HDFS-5856. DataNode.checkDiskError might throw NPE.
(Josh Elser via suresh)
Release 2.3.0 - UNRELEASED
INCOMPATIBLE CHANGES

View File

@ -36,6 +36,7 @@ import java.net.SocketTimeoutException;
import java.net.URI;
import java.net.UnknownHostException;
import java.nio.channels.ClosedByInterruptException;
import java.nio.channels.ClosedChannelException;
import java.nio.channels.SocketChannel;
import java.security.PrivilegedExceptionAction;
import java.util.ArrayList;
@ -51,7 +52,6 @@ import java.util.concurrent.atomic.AtomicInteger;
import javax.management.ObjectName;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience;
@ -1324,12 +1324,7 @@ public class DataNode extends Configured
protected void checkDiskError(Exception e ) throws IOException {
LOG.warn("checkDiskError: exception: ", e);
if (e instanceof SocketException || e instanceof SocketTimeoutException
|| e instanceof ClosedByInterruptException
|| e.getMessage().startsWith("An established connection was aborted")
|| e.getMessage().startsWith("Broken pipe")
|| e.getMessage().startsWith("Connection reset")
|| e.getMessage().contains("java.nio.channels.SocketChannel")) {
if (isNetworkRelatedException(e)) {
LOG.info("Not checking disk as checkDiskError was called on a network" +
" related exception");
return;
@ -1342,6 +1337,28 @@ public class DataNode extends Configured
}
}
/**
* Check if the provided exception looks like it's from a network error
* @param e the exception from a checkDiskError call
* @return true if this exception is network related, false otherwise
*/
protected boolean isNetworkRelatedException(Exception e) {
if (e instanceof SocketException
|| e instanceof SocketTimeoutException
|| e instanceof ClosedChannelException
|| e instanceof ClosedByInterruptException) {
return true;
}
String msg = e.getMessage();
return null != msg
&& (msg.startsWith("An established connection was aborted")
|| msg.startsWith("Broken pipe")
|| msg.startsWith("Connection reset")
|| msg.contains("java.nio.channels.SocketChannel"));
}
/**
* Check if there is a disk failure and if so, handle the error
*/

View File

@ -18,12 +18,16 @@
package org.apache.hadoop.hdfs.server.datanode;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import java.io.DataOutputStream;
import java.io.File;
import java.net.InetSocketAddress;
import java.net.Socket;
import java.net.SocketException;
import java.net.SocketTimeoutException;
import java.nio.channels.ClosedChannelException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
@ -196,4 +200,16 @@ public class TestDiskError {
}
}
}
@Test
public void testNetworkErrorsIgnored() {
DataNode dn = cluster.getDataNodes().iterator().next();
assertTrue(dn.isNetworkRelatedException(new SocketException()));
assertTrue(dn.isNetworkRelatedException(new SocketTimeoutException()));
assertTrue(dn.isNetworkRelatedException(new ClosedChannelException()));
assertTrue(dn.isNetworkRelatedException(new Exception("Broken pipe foo bar")));
assertFalse(dn.isNetworkRelatedException(new Exception()));
assertFalse(dn.isNetworkRelatedException(new Exception("random problem")));
}
}