HDFS-5856. DataNode.checkDiskError might throw NPE. Contributed by Josh Elser.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1563064 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Suresh Srinivas 2014-01-31 05:35:30 +00:00
parent ed097d24fe
commit 7039b776c6
3 changed files with 43 additions and 7 deletions

View File

@ -310,6 +310,9 @@ Release 2.4.0 - UNRELEASED
HDFS-5843. DFSClient.getFileChecksum() throws IOException if checksum is HDFS-5843. DFSClient.getFileChecksum() throws IOException if checksum is
disabled. (Laurent Goujon via jing9) disabled. (Laurent Goujon via jing9)
HDFS-5856. DataNode.checkDiskError might throw NPE.
(Josh Elser via suresh)
Release 2.3.0 - UNRELEASED Release 2.3.0 - UNRELEASED
INCOMPATIBLE CHANGES INCOMPATIBLE CHANGES

View File

@ -36,6 +36,7 @@ import java.net.SocketTimeoutException;
import java.net.URI; import java.net.URI;
import java.net.UnknownHostException; import java.net.UnknownHostException;
import java.nio.channels.ClosedByInterruptException; import java.nio.channels.ClosedByInterruptException;
import java.nio.channels.ClosedChannelException;
import java.nio.channels.SocketChannel; import java.nio.channels.SocketChannel;
import java.security.PrivilegedExceptionAction; import java.security.PrivilegedExceptionAction;
import java.util.ArrayList; import java.util.ArrayList;
@ -51,7 +52,6 @@ import java.util.concurrent.atomic.AtomicInteger;
import javax.management.ObjectName; import javax.management.ObjectName;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceAudience;
@ -1324,12 +1324,7 @@ public class DataNode extends Configured
protected void checkDiskError(Exception e ) throws IOException { protected void checkDiskError(Exception e ) throws IOException {
LOG.warn("checkDiskError: exception: ", e); LOG.warn("checkDiskError: exception: ", e);
if (e instanceof SocketException || e instanceof SocketTimeoutException if (isNetworkRelatedException(e)) {
|| e instanceof ClosedByInterruptException
|| e.getMessage().startsWith("An established connection was aborted")
|| e.getMessage().startsWith("Broken pipe")
|| e.getMessage().startsWith("Connection reset")
|| e.getMessage().contains("java.nio.channels.SocketChannel")) {
LOG.info("Not checking disk as checkDiskError was called on a network" + LOG.info("Not checking disk as checkDiskError was called on a network" +
" related exception"); " related exception");
return; return;
@ -1342,6 +1337,28 @@ public class DataNode extends Configured
} }
} }
/**
* Check if the provided exception looks like it's from a network error
* @param e the exception from a checkDiskError call
* @return true if this exception is network related, false otherwise
*/
protected boolean isNetworkRelatedException(Exception e) {
if (e instanceof SocketException
|| e instanceof SocketTimeoutException
|| e instanceof ClosedChannelException
|| e instanceof ClosedByInterruptException) {
return true;
}
String msg = e.getMessage();
return null != msg
&& (msg.startsWith("An established connection was aborted")
|| msg.startsWith("Broken pipe")
|| msg.startsWith("Connection reset")
|| msg.contains("java.nio.channels.SocketChannel"));
}
/** /**
* Check if there is a disk failure and if so, handle the error * Check if there is a disk failure and if so, handle the error
*/ */

View File

@ -18,12 +18,16 @@
package org.apache.hadoop.hdfs.server.datanode; package org.apache.hadoop.hdfs.server.datanode;
import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue; import static org.junit.Assert.assertTrue;
import java.io.DataOutputStream; import java.io.DataOutputStream;
import java.io.File; import java.io.File;
import java.net.InetSocketAddress; import java.net.InetSocketAddress;
import java.net.Socket; import java.net.Socket;
import java.net.SocketException;
import java.net.SocketTimeoutException;
import java.nio.channels.ClosedChannelException;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileSystem;
@ -196,4 +200,16 @@ public class TestDiskError {
} }
} }
} }
@Test
public void testNetworkErrorsIgnored() {
DataNode dn = cluster.getDataNodes().iterator().next();
assertTrue(dn.isNetworkRelatedException(new SocketException()));
assertTrue(dn.isNetworkRelatedException(new SocketTimeoutException()));
assertTrue(dn.isNetworkRelatedException(new ClosedChannelException()));
assertTrue(dn.isNetworkRelatedException(new Exception("Broken pipe foo bar")));
assertFalse(dn.isNetworkRelatedException(new Exception()));
assertFalse(dn.isNetworkRelatedException(new Exception("random problem")));
}
} }