From 16ef8dd3a5e782ee9546ba5a7232f55fc3fe5c0c Mon Sep 17 00:00:00 2001 From: Suresh Srinivas Date: Thu, 25 Apr 2013 04:25:58 +0000 Subject: [PATCH] HDFS-4745. TestDataTransferKeepalive#testSlowReader has race condition that causes sporadic failure. Contributed by Chris Nauroth. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1475623 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 +++ .../hdfs/TestDataTransferKeepalive.java | 24 +++++++++++++++---- 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 5bc0b87ecbc..a6de677ed73 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -588,6 +588,9 @@ Release 2.0.5-beta - UNRELEASED HDFS-4739. NN can miscalculate the number of extra edit log segments to retain. (atm) + HDFS-4745. TestDataTransferKeepalive#testSlowReader has race condition that + causes sporadic failure. (Chris Nauroth via suresh) + Release 2.0.4-alpha - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDataTransferKeepalive.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDataTransferKeepalive.java index 3c9ee25b111..bf4e13bd027 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDataTransferKeepalive.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDataTransferKeepalive.java @@ -146,7 +146,15 @@ public class TestDataTransferKeepalive { stm.read(); assertXceiverCount(1); - Thread.sleep(WRITE_TIMEOUT + 1000); + // Poll for 0 running xceivers. Allow up to 5 seconds for some slack. + long totalSleepTime = 0; + long sleepTime = WRITE_TIMEOUT + 100; + while (getXceiverCountWithoutServer() > 0 && totalSleepTime < 5000) { + Thread.sleep(sleepTime); + totalSleepTime += sleepTime; + sleepTime = 100; + } + // DN should time out in sendChunks, and this should force // the xceiver to exit. assertXceiverCount(0); @@ -190,9 +198,7 @@ public class TestDataTransferKeepalive { } private void assertXceiverCount(int expected) { - // Subtract 1, since the DataXceiverServer - // counts as one - int count = dn.getXceiverCount() - 1; + int count = getXceiverCountWithoutServer(); if (count != expected) { ReflectionUtils.printThreadInfo( new PrintWriter(System.err), @@ -201,4 +207,14 @@ public class TestDataTransferKeepalive { count); } } + + /** + * Returns the datanode's xceiver count, but subtracts 1, since the + * DataXceiverServer counts as one. + * + * @return int xceiver count, not including DataXceiverServer + */ + private int getXceiverCountWithoutServer() { + return dn.getXceiverCount() - 1; + } }