diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index 11b67eb0d96..0ab243cc285 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -210,6 +210,9 @@ Trunk (unreleased changes)
dfs.client.block.write.replace-datanode-on-failure.enable to be mistakenly
disabled. (atm)
+ HDFS-2525. Race between BlockPoolSliceScanner and append. (Brandon Li
+ via jitendra)
+
Release 0.23.2 - UNRELEASED
INCOMPATIBLE CHANGES
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockPoolSliceScanner.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockPoolSliceScanner.java
index e3709463b41..54c1b6f3952 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockPoolSliceScanner.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockPoolSliceScanner.java
@@ -51,11 +51,8 @@ import org.apache.hadoop.hdfs.util.DataTransferThrottler;
import org.apache.hadoop.io.IOUtils;
/**
- * Performs two types of scanning:
- *
Gets block files from the data directories and reconciles the
- * difference between the blocks on the disk and in memory.
- * Scans the data directories for block files under a block pool
- * and verifies that the files are not corrupt
+ * Scans the block files under a block pool and verifies that the
+ * files are not corrupt.
* This keeps track of blocks and their last verification times.
* Currently it does not modify the metadata for block.
*/
@@ -430,6 +427,19 @@ class BlockPoolSliceScanner {
return;
}
+ // If the block exists, the exception may due to a race with write:
+ // The BlockSender got an old block path in rbw. BlockReceiver removed
+ // the rbw block from rbw to finalized but BlockSender tried to open the
+ // file before BlockReceiver updated the VolumeMap. The state of the
+ // block can be changed again now, so ignore this error here. If there
+ // is a block really deleted by mistake, DirectoryScan should catch it.
+ if (e instanceof FileNotFoundException ) {
+ LOG.info("Verification failed for " + block +
+ ". It may be due to race with write.");
+ deleteBlock(block.getLocalBlock());
+ return;
+ }
+
LOG.warn((second ? "Second " : "First ") + "Verification failed for "
+ block, e);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestAppendDifferentChecksum.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestAppendDifferentChecksum.java
index f296419bde5..9fbb7605d44 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestAppendDifferentChecksum.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestAppendDifferentChecksum.java
@@ -47,12 +47,6 @@ public class TestAppendDifferentChecksum {
public static void setupCluster() throws IOException {
Configuration conf = new HdfsConfiguration();
conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, 4096);
-
- // disable block scanner, since otherwise this test can trigger
- // HDFS-2525, which is a different bug than we're trying to unit test
- // here! When HDFS-2525 is fixed, this can be removed.
- conf.setInt(DFSConfigKeys.DFS_DATANODE_SCAN_PERIOD_HOURS_KEY, -1);
-
conf.set("fs.hdfs.impl.disable.cache", "true");
cluster = new MiniDFSCluster.Builder(conf)
.numDataNodes(1)