diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 11b67eb0d96..0ab243cc285 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -210,6 +210,9 @@ Trunk (unreleased changes) dfs.client.block.write.replace-datanode-on-failure.enable to be mistakenly disabled. (atm) + HDFS-2525. Race between BlockPoolSliceScanner and append. (Brandon Li + via jitendra) + Release 0.23.2 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockPoolSliceScanner.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockPoolSliceScanner.java index e3709463b41..54c1b6f3952 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockPoolSliceScanner.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockPoolSliceScanner.java @@ -51,11 +51,8 @@ import org.apache.hadoop.hdfs.util.DataTransferThrottler; import org.apache.hadoop.io.IOUtils; /** - * Performs two types of scanning: - *
  • Gets block files from the data directories and reconciles the - * difference between the blocks on the disk and in memory.
  • - *
  • Scans the data directories for block files under a block pool - * and verifies that the files are not corrupt
  • + * Scans the block files under a block pool and verifies that the + * files are not corrupt. * This keeps track of blocks and their last verification times. * Currently it does not modify the metadata for block. */ @@ -430,6 +427,19 @@ class BlockPoolSliceScanner { return; } + // If the block exists, the exception may due to a race with write: + // The BlockSender got an old block path in rbw. BlockReceiver removed + // the rbw block from rbw to finalized but BlockSender tried to open the + // file before BlockReceiver updated the VolumeMap. The state of the + // block can be changed again now, so ignore this error here. If there + // is a block really deleted by mistake, DirectoryScan should catch it. + if (e instanceof FileNotFoundException ) { + LOG.info("Verification failed for " + block + + ". It may be due to race with write."); + deleteBlock(block.getLocalBlock()); + return; + } + LOG.warn((second ? "Second " : "First ") + "Verification failed for " + block, e); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestAppendDifferentChecksum.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestAppendDifferentChecksum.java index f296419bde5..9fbb7605d44 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestAppendDifferentChecksum.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestAppendDifferentChecksum.java @@ -47,12 +47,6 @@ public class TestAppendDifferentChecksum { public static void setupCluster() throws IOException { Configuration conf = new HdfsConfiguration(); conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, 4096); - - // disable block scanner, since otherwise this test can trigger - // HDFS-2525, which is a different bug than we're trying to unit test - // here! When HDFS-2525 is fixed, this can be removed. - conf.setInt(DFSConfigKeys.DFS_DATANODE_SCAN_PERIOD_HOURS_KEY, -1); - conf.set("fs.hdfs.impl.disable.cache", "true"); cluster = new MiniDFSCluster.Builder(conf) .numDataNodes(1)